*Replication file 01a_Imposition_limited_sample_EU
*Article: Counterfactual Coercion: Could harsher sanctions against Russia have prevented the worst?
*Authors: Thies Niemeier, Gerald Schneider


***************************************************************
***EU***
***************************************************************

set seed 1234

* Install packages
ssc install "brglm"
ssc install "outreg2"

*Prepare data
use "Dataset.dta", clear
keep if sender=="EU"

** Filter for cases of importance
keep if pot_sanctioned_countries == 1

gen ln_oil_gas_value_2014 = ln(oil_gas_value_2014+1)
gen sender_colony=0
replace sender_colony=1 if ht_colonial==2 | ht_colonial==3 | ht_colonial==6 | ht_colonial==7 | ht_colonial==8 | ht_colonial==9 | ht_colonial==10

gen sender_additional=cond(threatUS==1 | impositionUS == 1, 1, 0)
gen only_threat=cond(threatEU==1 & impositionEU == 0, 1, 0)

gen sender_trade = ln_EU_Trade_Eurostat
gen coup_dummy = coup1
replace coup_dummy = 0 if coup_dummy == 1
replace coup_dummy = 1 if coup_dummy == 2

* Dependent variable : 1 if a threat or sanction case was ongoing in the dyad
gen sanction_threat = sanction_dyad
replace sanction_threat = 1 if threat_dyad==1
tab sanction_threat
gen sanction_train= sanction_threat if year < 2009
gen sanction_test= sanction_threat if year >= 2009

* lag time-series variables
sort ccodecow year
by ccodecow: gen l_v2x_polyarchy = v2x_polyarchy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_gd_ptss = gd_ptss[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_coup_dummy = coup_dummy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_one_sided_violence = one_sided_violence[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_conflict = conflict[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_mid_terr_integrity = mid_terr_integrity[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_GDPpc_imputed = ln_GDPpc_imputed[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_sender_trade = sender_trade[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_oil_gas_value = ln_oil_gas_value_2014[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_defense_alliance = defense_alliance[_n-1] if year==year[_n-1]+1

* create dummy variables
tabulate l_gd_ptss, generate (pol_terr)

* sortieren nach Jahr, zur Vorbereitung RF model
gen u=0
replace u=1 if year >= 2009
sort u

** Imposition
* Random Forest Model
rforest sanction_threat l_v2x_polyarchy pol_terr* l_coup_dummy ///
l_one_sided_violence l_conflict l_mid_terr_integrity ///
l_ln_GDPpc_imputed l_sender_trade l_ln_oil_gas_value ///
sender_colony l_defense_alliance in 1/2789, type(class) iter(1500) numvars(15)

*Variable Importance
ereturn list
matrix list e(importance)
* write Variable importance to excel file
putexcel set "Supplemental_Material\Variable_Importance\Variable_Importance_Imposition_EU_RF.xlsx", sheet("M") replace
putexcel A1=matrix(e(importance)), names

* Evaluation of model performance
** predictions
predict randonsEU
predict randonsEU0 randonsEU1, pr

* Confusion Matrix
** Sensitivity 68.2, Specificity 91.4
tab2xl sanction_test randonsEU using Main_Article/EU_Imposition_RF_Confusion_Matrix, row(1) col(1)
diagtest sanction_test randonsEU

* AUPR .55
prtab sanction_test randonsEU1, title("EU", box bexpand) l2title("Baseline model", box bexpand) 
graph save "Graph" "Supplemental_Material\Prediction_Output\Roc-curves\AUPR_Imposition_RF_EU.gph", replace

* .42
kap sanction_test randonsEU

*******************************************************************
******* Compare to logistic regression ability ********************
*******************************************************************

sort ccodecow year
egen sum_sanction = sum(sanction_dyad), by(ccodecow)
gen dum_country = ccodecow
replace dum_country = 0 if sum_sanction==0
xtset ccodecow year

* Original
brglm sanction_train L.v2x_polyarchy i.L.gd_ptss i.L.coup_dummy i.L.one_sided_violence i.L.conflict i.L.mid_terr_integrity L.ln_GDPpc_imputed L.sender_trade L.ln_oil_gas_value_2014 i.L.sender_colony i.L.defense_alliance  i.dum_country i.year, vce(cluster ccodecow)

predict onsEU_logistic
gen prob_sanc_onsEU_logistic = 1/(1+exp(-onsEU_logistic))
gen bin_prob_sanc_onsEU_logistic = cond(prob_sanc_onsEU_logistic > .5, 1,0)
replace bin_prob_sanc_onsEU_logistic=. if missing(prob_sanc_onsEU_logistic)
tab sanction_test bin_prob_sanc_onsEU_logistic
tab2xl sanction_test bin_prob_sanc_onsEU_logistic using Supplemental_Material\Prediction_Output\Confusion_Matrixes\EU_Imposition_PMLFE, col(1) row(1)

*Evaluation
* Sensitivity 76.3 Specificity 89.6
diagtest sanction_test bin_prob_sanc_onsEU_logistic
* kappa .32
kap sanction_test bin_prob_sanc_onsEU_logistic
* AUPR .64
prtab sanction_test prob_sanc_onsEU_logistic, title("EU", box bexpand) l2title("PML-FE", box bexpand)
graph save "Graph" "Supplemental_Material\Prediction_Output\ROC-curves\AUPR_Imposition_PMLFE_EU.gph", replace


* Random forest only for cases with data
gen helper_sanction_test=sanction_test if!missing(prob_sanc_onsEU_logistic)
tab2xl helper_sanction_test randonsEU using Supplemental_Material\Prediction_Output\Confusion_Matrixes\EU_Imposition_RF_lim_sample, col(1) row(1)

* AUPR .54
prtab helper_sanction_test randonsEU1, l2title("RF, sample limited", box bexpand) 
graph save "Graph" "Supplemental_Material\Prediction_Output\ROC-curves\AUPR_Imposition_RF_EU_lim_sample", replace

* kappa .42
kap helper_sanction_test randonsEU

* Sensitivity 68.8, Specificity 91
diagtest helper_sanction_test randonsEU
